| title: “gender pay gap uk” |
| author: “Tomasz Olczyk” |
| date: “3/30/2024” |
| output: html_document |
| params: |
| category: |
| value: all |
| choices: [all, admin & organisation, |
| care & education, creative & media, law & justice, |
| manual work, sales & serving others, science, tech & engineering, |
| senior managers & execs] |
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
pay_gap <- read_csv("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/case%20studies%20/pay_gap_uk.csv")
## Rows: 81 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): occupation, category, pay_gap_as_a_percentage
## dbl (3): women_average_annual_salary, men_average_annual_salary, pay_gap
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(pay_gap)
## # A tibble: 6 × 6
## occupation category women_average_annual…¹ men_average_annual_s…² pay_gap
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Admin & organi… admin &… 20272 24691 4419
## 2 Receptionists admin &… 12009 13281 1272
## 3 Secretaries admin &… 14614 15315 701
## 4 Admin admin &… 14594 18729 4135
## 5 Stock control admin &… 17271 20538 3267
## 6 Government adm… admin &… 19287 23047 3760
## # ℹ abbreviated names: ¹women_average_annual_salary, ²men_average_annual_salary
## # ℹ 1 more variable: pay_gap_as_a_percentage <chr>
dane_gap <- pay_gap %>%
select(occupation,
women = women_average_annual_salary,
men = men_average_annual_salary,
pay_gap,
category) %>%
mutate(category = factor(category))
dane_long <- pay_gap %>%
select(occupation,
women = women_average_annual_salary,
men =men_average_annual_salary,
pay_gap,
category) %>%
pivot_longer(cols = c(women, men),
names_to = "gender",
values_to = "salary") %>%
mutate(category = factor(category))
head(dane_long)
## # A tibble: 6 × 5
## occupation pay_gap category gender salary
## <chr> <dbl> <fct> <chr> <dbl>
## 1 Admin & organisation 4419 admin & organisation women 20272
## 2 Admin & organisation 4419 admin & organisation men 24691
## 3 Receptionists 1272 admin & organisation women 12009
## 4 Receptionists 1272 admin & organisation men 13281
## 5 Secretaries 701 admin & organisation women 14614
## 6 Secretaries 701 admin & organisation men 15315
kolory <-
kolor1 <- rgb(80, 27, 60, maxColorValue = 255)
kolor2 <- rgb(186, 195, 106, maxColorValue = 255)
skala <- c(kolor1, kolor2)
x <-
ggplot(data = dane_long %>%
filter(category == params$category), aes(y=reorder(occupation, salary))) +
geom_segment(data = dane_gap %>%
filter(category == params$category),
aes(y = occupation,
yend=occupation,
x= women,
xend = men)) +
geom_point(aes(x = salary, colour = gender)) +
scale_color_manual(values = skala, guide_legend(position = "top")) +
theme_void()
x
y <- dane_long %>%
filter(category == params$category) %>%
ggplot( aes(y=reorder(occupation, pay_gap))) +
geom_segment(data = dane_gap, aes(y = occupation,
yend=occupation,
x= women,
xend = men)) +
geom_point(aes(x = salary, colour = gender))
y
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(y)